from extract_strings import load_strings_file, untranslated_count_for_lang, extract_strings_from_c_files, get_missing_for_language, dump_missing_per_language, write_out_strings_files, key_sort_func, load_lang_index
from util import group, uniquify, ul_cb, s3connection, s3PubBucket, s3UploadFilePublic, s3UploadDataPublic
import simplejson
import os.path
import re

g_can_upload = True
g_src_dir = os.path.join(os.path.split(__file__)[0], "..", "src")

try:
    import boto.s3
    from boto.s3.key import Key
except:
    print("You need boto library (http://code.google.com/p/boto/)")
    print("svn checkout http://boto.googlecode.com/svn/trunk/ boto")
    print("cd boto; python setup.py install")
    g_can_upload = False

try:
    import awscreds
except:
    print("awscreds.py not present")
    g_can_upload = False

S3_JS_NAME = "blog/sumatrapdf-langs.js"
# number of missing translations for a language to be considered
# incomplete (will be excluded from Translations_txt.cpp)
INCOMPLETE_MISSING_THRESHOLD = 40

TRANSLATIONS_TXT_C = """\
/* Generated by scripts\\update_translations.py
   DO NOT EDIT MANUALLY */

#ifndef MAKELANGID
#include <windows.h>
#endif

#define LANGS_COUNT   %(langs_count)d
#define STRINGS_COUNT %(translations_count)d

typedef struct {
    const char *code;
    const char *fullName;
    LANGID id;
    BOOL isRTL;
} LangDef;

#define _LANGID(lang) MAKELANGID(lang, SUBLANG_NEUTRAL)

LangDef gLangData[LANGS_COUNT] = {
    %(lang_data)s
};

#undef _LANGID

const char *gTranslations[LANGS_COUNT * STRINGS_COUNT] = {
%(translations)s
};
"""

# use octal escapes because hexadecimal ones can consist of
# up to four characters, e.g. \xABc isn't the same as \253c
def c_oct(c):
    o = "00" + oct(ord(c))
    return "\\" + o[-3:]

def c_escape(txt):
    if txt is None:
        return "NULL"
    # escape all quotes
    txt = txt.replace('"', r'\"')
    # and all non-7-bit characters of the UTF-8 encoded string
    txt = re.sub(r"[\x80-\xFF]", lambda m: c_oct(m.group(0)[0]), txt.encode("utf-8"))
    return '"%s"' % txt

def get_trans_for_lang(strings_dict, keys, lang_arg):
    trans = []
    for k in keys:
        txt = None
        for (lang, tr) in strings_dict[k]:
            if lang_arg == lang:
                # don't include a translation, if it's the same as the default
                if tr != k:
                    txt = tr
                break
        trans.append(txt)
    return trans

DEFAULT_LANG = "en"

def lang_sort_func(x,y):
    # special case: default language is first
    if x[0] == DEFAULT_LANG: return -1
    if y[0] == DEFAULT_LANG: return 1
    return cmp(x[1], y[1])

def make_lang_ids(langs, lang_index):
    lang_ids = {}
    for cols in lang_index:
        if cols[1] and cols[2]:
            id = "MAKELANGID(LANG_%s, SUBLANG_%s_%s)" % (cols[1], cols[1], cols[2].replace(" ", "_"))
        elif cols[1]:
            id = "_LANGID(LANG_%s)" % (cols[1])
        else:
            id = "-1" # invalid LANGID
        lang_ids[cols[0]] = id.upper()

    for lang in langs:
        if lang[0] not in lang_ids:
            print("Warning: Missing LANGID for %s (%s)" % (lang))
            lang_ids[lang] = "-1"

    return lang_ids

def make_lang_layouts(lang_index):
    lang_layouts = {}
    for cols in lang_index:
        lang_layouts[cols[0]] = cols[3] == "RTL" and 1 or 0
    return lang_layouts

def gen_c_code(langs_ex, strings_dict, file_name, lang_index):
    langs_ex.sort(lang_sort_func)
    langs = [cols[0] for cols in langs_ex]
    assert DEFAULT_LANG == langs[0]
    langs_count = len(langs)
    translations_count = len(strings_dict)
    
    keys = strings_dict.keys()
    keys.sort(cmp=key_sort_func)
    lines = []
    for lang in langs:
        if DEFAULT_LANG == lang:
            trans = keys
        else:
            trans = get_trans_for_lang(strings_dict, keys, lang)
        lines.append("")
        lines.append("  /* Translations for language %s */" % lang)
        lines += ["  %s," % c_escape(t) for t in trans]
    translations = "\n".join(lines)
    
    lang_ids = make_lang_ids(langs_ex, lang_index)
    lang_layouts = make_lang_layouts(lang_index)
    lang_data = ['{ "%s", %s, %s, %d },' % (lang[0], c_escape(lang[1]), lang_ids[lang[0]], lang_layouts[lang[0]]) for lang in langs_ex]
    lang_data = "\n    ".join(lang_data)
    
    file_content = TRANSLATIONS_TXT_C % locals()
    file(file_name, "wb").write(file_content)

def contributors_for_lang(contributors, lang):
    return sorted(contributors.get(lang, []))

def gen_js_data(strings_dict, langs, contributors):
    res = []
    for (lang_iso, lang_name) in langs:
        if DEFAULT_LANG == lang_iso: continue
        lang_name = lang_name.split(" (")[0]
        count = untranslated_count_for_lang(strings_dict, lang_iso)
        svnurl = "http://sumatrapdf.googlecode.com/svn/trunk/strings/" + lang_iso + ".txt"
        c = contributors_for_lang(contributors, lang_iso)
        res.append([lang_iso, lang_name, count, svnurl, c])
    return sorted(res, lambda x, y: cmp(y[2], x[2]) or cmp(x[1], y[1]))

# Generate json data as array of arrays in the format:
# [langname, lang-iso-code, untranslated_strings_count, svn_url, [contributors]]
# sorted by untranslated string count (biggest at the top)
def gen_and_upload_js(strings_dict, langs, contributors):
    if not g_can_upload:
        print("Can't upload javascript to s3")
        return
    data = gen_js_data(strings_dict, langs, contributors)
    js = simplejson.dumps(data)
    js = "var g_langsData = " + js + ";\n"
    #print(js)
    s3UploadDataPublic(js, S3_JS_NAME)

def get_untranslated_as_list(untranslated_dict):
    return uniquify(sum(untranslated_dict.values(), []))

def remove_incomplete_translations(langs, strings, strings_dict, threshold):
    assert langs[0][0] == DEFAULT_LANG
    for lang in langs[1:]:
        missing = get_missing_for_language(strings, strings_dict, lang[0])
        if len(missing) >= threshold:
            langs.remove(lang)

def main():
    (strings_dict, langs, contributors) = load_strings_file()
    strings = extract_strings_from_c_files()
    for s in strings_dict.keys():
        if s not in strings:
            del strings_dict[s]
    untranslated_dict = dump_missing_per_language(strings, strings_dict)
    write_out_strings_files(strings_dict, langs, contributors, untranslated_dict)
    untranslated = get_untranslated_as_list(untranslated_dict)
    for s in untranslated:
        if s not in strings_dict:
            strings_dict[s] = []

    c_file_name = os.path.join(g_src_dir, "Translations_txt.cpp")
    gen_and_upload_js(strings_dict, langs, contributors)
    remove_incomplete_translations(langs, strings, strings_dict, INCOMPLETE_MISSING_THRESHOLD)
    gen_c_code(langs, strings_dict, c_file_name, load_lang_index())

if __name__ == "__main__":
    main()
